《第25天》YOLO訓練流程與資料集YOLO txt格式

2022 iThome 鐵人賽

DAY 25

AI & Data

Object Detection and Image Processing with Python系列第 25 篇

14th鐵人賽

Ethan Chen

團隊大腦已超載

2022-10-10 22:31:25

12825 瀏覽

分享至

YOLO模型訓練流程

資料集標註

1.1 說明：使用LabelImg框選出圖片中物件Bounding box，並標註其類別。

1.2 範例
轉換資料集標註格式

2.1 說明：資料集標註格式可區分為：PascalVOC(xml檔)、YOLO(txt檔)、COCO格式(json檔)。

2.2 範例
- PascalVOC(xml檔)
- YOLO(txt檔)
- COCO格式(json檔)
分配訓練集(train)、驗證集(val)與測試集(test)
模型訓練

4.1 說明：讀取相對應的資料集與標籤，執行模型訓練。如：WongKinYiu/PyTorch_YOLOv4可讀取YOLO(txt檔)。

4.2 範例

使用LabelImg標註資料集

LabelImg

1.1 下載LabelImg

1.2 解壓縮windows_v1.8.1。

1.3 執行LabelImg(以【第3天】資料前處理-YOLOv4與自動框選中文字示範)
- 執行LabelImg後，分別點選Open Dir與Change Save Dir，設定訓練集圖檔路徑及標記Annotations(xml檔)的儲存路徑。
- 勾選選單中View的Auto Save
- 框選圖片中物件(Bounding box)，並標記其類別名稱 (快捷鍵：W拉框框、D下一張、A上一張)
- 每標記完一張圖檔都會產出一個對應的xml檔案

PascalVOC轉YOLO格式與分配資料集

流程與Python函式

1.1 PascalVOC與YOLO格式轉換公式

def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh

    return (x, y, w, h)

1.2 讀取PascalVOC(xml)，轉換後儲存成YOLO(txt)

def convert_xml_to_voc(xmlPath):
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        filename = xml.replace('.xml', '.txt').replace('xmls', 'images')

        # 處理每個標註的Bounding box
        with open(filename, "a") as bbox:
            size = root.find('size')
            w = int(size.find('width').text)
            h = int(size.find('height').text)

            for obj in root.iter('object'):
                difficult = obj.find('difficult').text
                cls = obj.find('name').text
                if cls not in classes or int(difficult) == 1:
                    continue
                cls_id = classes.index(cls)
                xmlbox = obj.find('bndbox')
                b = (float(xmlbox.find('xmin').text), 
                     float(xmlbox.find('xmax').text), 
                     float(xmlbox.find('ymin').text),
                     float(xmlbox.find('ymax').text))
                bb = convert((w, h), b)
                bbox.write(str(cls_id) + " " + 
                           " ".join([str(a) for a in bb]) + '\n')
    print('1. 將標籤從xml轉換成voc格式：完成')
    print('-'*50)

1.3 將圖片與YOLO(txt)標籤依照比例分配train與val

def train_val_split(imagePath, ratio):
    # 檔案順序隨機
    random.shuffle(imagePath)

    # 分配訓練或驗證集(依照ratio比例分配)
    pic_num = len(imagePath)
    train_num = int(pic_num * ratio)
    train_pic = imagePath[:train_num]
    train_voc = [i.replace('.png', '.txt').replace('.jpg', '.txt') 
                 for i in train_pic]
    train_list = train_pic + train_voc
    val_pic = imagePath[train_num:]
    val_voc = [i.replace('.png', '.txt').replace('.jpg', '.txt') 
               for i in val_pic]
    val_list = val_pic + val_voc
    print('2. 分配train與val資料集：完成')
    print('-'*50)

    return train_list, val_list

1.4 移動圖片與YOLO(txt)標籤到train與val資料夾

def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val')
    if not os.path.exists(folder2):
        os.makedirs(folder2)

    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(move_it, move_it.replace('images', 'train'))
    for move_it in val_list:
        shutil.move(move_it, move_it.replace('images', 'val'))
    print('3. 移動圖片與voc標籤到train與val資料夾：完成')

完整程式碼

import xml.etree.ElementTree as ET
import os
import random
import shutil

def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x, y, w, h)

def convert_xml_to_voc(xmlPath):
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        filename = xml.replace('.xml', '.txt').replace('xmls', 'images')

        # 處理每個標註的Bounding box
        with open(filename, "a") as bbox:
            size = root.find('size')
            w = int(size.find('width').text)
            h = int(size.find('height').text)

            for obj in root.iter('object'):
                difficult = obj.find('difficult').text
                cls = obj.find('name').text
                if cls not in classes or int(difficult) == 1:
                    continue
                cls_id = classes.index(cls)
                xmlbox = obj.find('bndbox')
                b = (float(xmlbox.find('xmin').text),
                     float(xmlbox.find('xmax').text),
                     float(xmlbox.find('ymin').text),
                     float(xmlbox.find('ymax').text))
                bb = convert((w, h), b)
                bbox.write(str(cls_id) + " " +
                           " ".join([str(a) for a in bb]) + '\n')
    print('1. 將標籤從xml轉換成voc格式：完成')
    print('-'*50)

# 將圖片依照比例分配train與val
def train_val_split(imagePath, ratio):
    # 檔案順序隨機
    random.shuffle(imagePath)

    # 分配訓練或驗證集(依照ratio比例分配)
    pic_num = len(imagePath)
    train_num = int(pic_num * ratio)
    train_pic = imagePath[:train_num]
    train_voc = [i.replace('.png', '.txt').replace('.jpg', '.txt')
                 for i in train_pic]
    train_list = train_pic + train_voc
    val_pic = imagePath[train_num:]
    val_voc = [i.replace('.png', '.txt').replace('.jpg', '.txt')
               for i in val_pic]
    val_list = val_pic + val_voc
    print('2. 分配train與val資料集：完成')
    print('-'*50)

    return train_list, val_list

# 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val')
    if not os.path.exists(folder2):
        os.makedirs(folder2)

    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(move_it, move_it.replace('images', 'train'))
    for move_it in val_list:
        shutil.move(move_it, move_it.replace('images', 'val'))
    print('3. 移動圖片與voc標籤到train與val資料夾：完成')

if __name__ == '__main__':
    source = './dataests2/'
    # 讀取標籤類別
    with open(os.path.join(source, 'classes.txt'), encoding='utf-8') as f:
        classes = f.read().strip().split()
    # xml資料夾路徑
    xmlDir = os.path.join(source, 'xmls/')
    # xml檔案路徑
    xmlPath = os.listdir(xmlDir)
    xmlPath = [xmlDir + i for i in xmlPath]
    # image資料夾路徑
    imageDir = os.path.join(source, 'images/')
    # image檔案路徑
    imagePath = os.listdir(imageDir)
    imagePath = [imageDir + i for i in imagePath]

    convert_xml_to_voc(xmlPath)
    train_list, val_list = train_val_split(imagePath, 0.8)
    split_images_to_train_and_val(source, train_list, val_list)